##### ####################################################
#####                                               ######
#####                 Subset analyses            
#####                                               ######
##### ####################################################

# init ------------------------------------------------------------

rm(list=ls())
set.seed(221186)

# Load libraries

library(data.table) # 1.11.4
library(mgcv) # 1.8-24
library(stargazer) # 5.2.2
library(DataCombine) # 0.2.21 for slide function
library(scales) # 1.0.0

# Some additional functions
source("utils.R")

# Load data

load("data/speeches.Rdata")

nBootstrapReps <- 500

### ################################################
### Prepare subset data
### ################################################

base <- speeches[minister_in_debate==F & is_speaker == F,
                list(prop.women.words=sum(word_count[Gender=="F"],na.rm=T)/sum(word_count),
                     prop.women.speeches=length(body[Gender=="F"])/length(body),
                     ratio.women.words=(sum(word_count[Gender=="F"],na.rm=T)/sum(word_count))/unique(prop_women),
                     ratio.women.speeches=(length(body[Gender=="F"])/length(body))/unique(prop_women),
                     minister.gender=unique(minister_gender),
                     minister.name=unique(minister_name),
                     ministry = unique(debate_department),
                     yearmon = unique(yearmon),
                     model_weights = .N), by = list(subsection_id, debate_department)]

base$ministry <- as.factor(base$ministry)
base$yearmon.numeric <- as.numeric(base$yearmon)-1997

sink("latex/tables/usefulNumbers/total_debates_in_sample.tex")
cat(length(unique(base$subsection_id)))
sink()

qts <- speeches[minister_in_debate==F & is_speaker == F & grepl("oral answers",parent, ignore.case = T),
                 list(prop.women.words=sum(word_count[Gender=="F"],na.rm=T)/sum(word_count),
                      prop.women.speeches=length(body[Gender=="F"])/length(body),
                      ratio.women.words=(sum(word_count[Gender=="F"],na.rm=T)/sum(word_count))/unique(prop_women),
                      ratio.women.speeches=(length(body[Gender=="F"])/length(body))/unique(prop_women),
                      minister.gender=unique(minister_gender),
                      minister.name=unique(minister_name),
                      ministry = unique(debate_department),
                      yearmon = unique(yearmon),
                      model_weights = .N), by = list(subsection_id, debate_department)]

qts$ministry <- as.factor(qts$ministry)
qts$yearmon.numeric <- as.numeric(qts$yearmon)-1997

sink("latex/tables/usefulNumbers/total_qt_debates.tex")
cat(length(unique(qts$subsection_id)))
sink()

nonqts <- speeches[minister_in_debate==F & is_speaker == F & !grepl("oral answers",parent, ignore.case = T),
                list(prop.women.words=sum(word_count[Gender=="F"],na.rm=T)/sum(word_count),
                     prop.women.speeches=length(body[Gender=="F"])/length(body),
                     ratio.women.words=(sum(word_count[Gender=="F"],na.rm=T)/sum(word_count))/unique(prop_women),
                     ratio.women.speeches=(length(body[Gender=="F"])/length(body))/unique(prop_women),
                     minister.gender=unique(minister_gender),
                     minister.name=unique(minister_name),
                     ministry = unique(debate_department),
                     yearmon = unique(yearmon),
                     model_weights = .N), by = list(subsection_id, debate_department)]

nonqts$ministry <- as.factor(nonqts$ministry)
nonqts$yearmon.numeric <- as.numeric(nonqts$yearmon)-1997

sink("latex/tables/usefulNumbers/total_substantive_debates.tex")
cat(length(unique(nonqts$subsection_id)))
sink()

speeches$governing_party <- F
speeches$governing_party[speeches$party_short == "Labour" & speeches$gov%in%c("Blair 1", "Blair 2", "Blair 3", "Brown")] <- T
speeches$governing_party[speeches$party_short == "Conservative" & speeches$gov%in%c("Cameron", "Cameron 2", "May")] <- T
speeches$governing_party[speeches$party_short == "LibDem" & speeches$gov%in%c("Cameron")] <- T

opp <- speeches[minister_in_debate==F & is_speaker == F  & governing_party==F,
                list(prop.women.words=sum(word_count[Gender=="F"],na.rm=T)/sum(word_count),
                     prop.women.speeches=length(body[Gender=="F"])/length(body),
                     ratio.women.words=(sum(word_count[Gender=="F"],na.rm=T)/sum(word_count))/unique(prop_women),
                     ratio.women.speeches=(length(body[Gender=="F"])/length(body))/unique(prop_women),
                     minister.gender=unique(minister_gender),
                     minister.name=unique(minister_name),
                     ministry = unique(debate_department),
                     yearmon = unique(yearmon),
                     model_weights = .N), by = list(subsection_id, debate_department)]

opp$ministry <- as.factor(opp$ministry)
opp$yearmon.numeric <- as.numeric(opp$yearmon)-1997

sink("latex/tables/usefulNumbers/total_opposition_debates.tex")
cat(length(unique(opp$subsection_id)))
sink()

gov <- speeches[minister_in_debate==F & is_speaker == F &  governing_party==T,
                list(prop.women.words=sum(word_count[Gender=="F"],na.rm=T)/sum(word_count),
                     prop.women.speeches=length(body[Gender=="F"])/length(body),
                     ratio.women.words=(sum(word_count[Gender=="F"],na.rm=T)/sum(word_count))/unique(prop_women),
                     ratio.women.speeches=(length(body[Gender=="F"])/length(body))/unique(prop_women),
                     minister.gender=unique(minister_gender),
                     minister.name=unique(minister_name),
                     ministry = unique(debate_department),
                     yearmon = unique(yearmon),
                     model_weights = .N), by = list(subsection_id, debate_department)]

gov$ministry <- as.factor(gov$ministry)
gov$yearmon.numeric <- as.numeric(gov$yearmon)-1997


lab <- speeches[minister_in_debate==F & is_speaker == F & party_short == "Labour",
                list(prop.women.words=sum(word_count[Gender=="F"],na.rm=T)/sum(word_count),
                     prop.women.speeches=length(body[Gender=="F"])/length(body),
                     ratio.women.words=(sum(word_count[Gender=="F"],na.rm=T)/sum(word_count))/unique(prop_women),
                     ratio.women.speeches=(length(body[Gender=="F"])/length(body))/unique(prop_women),
                     minister.gender=unique(minister_gender),
                     minister.name=unique(minister_name),
                     ministry = unique(debate_department),
                     yearmon = unique(yearmon),
                     model_weights = .N), by = list(subsection_id, debate_department)]

lab$ministry <- as.factor(lab$ministry)
lab$yearmon.numeric <- as.numeric(lab$yearmon)-1997

sink("latex/tables/usefulNumbers/total_labour_debates.tex")
cat(length(unique(lab$subsection_id)))
sink()

con <- speeches[minister_in_debate==F & is_speaker == F & party_short == "Conservative",
                list(prop.women.words=sum(word_count[Gender=="F"],na.rm=T)/sum(word_count),
                     prop.women.speeches=length(body[Gender=="F"])/length(body),
                     ratio.women.words=(sum(word_count[Gender=="F"],na.rm=T)/sum(word_count))/unique(prop_women),
                     ratio.women.speeches=(length(body[Gender=="F"])/length(body))/unique(prop_women),
                     minister.gender=unique(minister_gender),
                     minister.name=unique(minister_name),
                     ministry = unique(debate_department),
                     yearmon = unique(yearmon),
                     model_weights = .N), by = list(subsection_id, debate_department)]

con$ministry <- as.factor(con$ministry)
con$yearmon.numeric <- as.numeric(con$yearmon)-1997

sink("latex/tables/usefulNumbers/total_conservative_debates.tex")
cat(length(unique(con$subsection_id)))
sink()

ld <- speeches[minister_in_debate==F & is_speaker == F & party_short %in% c("LibDem","Other"),
                list(prop.women.words=sum(word_count[Gender=="F"],na.rm=T)/sum(word_count),
                     prop.women.speeches=length(body[Gender=="F"])/length(body),
                     ratio.women.words=(sum(word_count[Gender=="F"],na.rm=T)/sum(word_count))/unique(prop_women),
                     ratio.women.speeches=(length(body[Gender=="F"])/length(body))/unique(prop_women),
                     minister.gender=unique(minister_gender),
                     minister.name=unique(minister_name),
                     ministry = unique(debate_department),
                     yearmon = unique(yearmon),
                     model_weights = .N), by = list(subsection_id, debate_department)]

ld$ministry <- as.factor(ld$ministry)
ld$yearmon.numeric <- as.numeric(ld$yearmon)-1997

sink("latex/tables/usefulNumbers/total_other_party_debates.tex")
cat(length(unique(ld$subsection_id)))
sink()

before_2010 <- speeches[minister_in_debate==F & is_speaker == F & hdate < as.Date("2010-05-06"),
               list(prop.women.words=sum(word_count[Gender=="F"],na.rm=T)/sum(word_count),
                    prop.women.speeches=length(body[Gender=="F"])/length(body),
                    ratio.women.words=(sum(word_count[Gender=="F"],na.rm=T)/sum(word_count))/unique(prop_women),
                    ratio.women.speeches=(length(body[Gender=="F"])/length(body))/unique(prop_women),
                    minister.gender=unique(minister_gender),
                    minister.name=unique(minister_name),
                    ministry = unique(debate_department),
                    yearmon = unique(yearmon),
                    model_weights = .N), by = list(subsection_id, debate_department)]

before_2010$ministry <- as.factor(before_2010$ministry)
before_2010$yearmon.numeric <- as.numeric(before_2010$yearmon)-1997

sink("latex/tables/usefulNumbers/total_pre2010_debates.tex")
cat(length(unique(before_2010$subsection_id)))
sink()

after_2010 <- speeches[minister_in_debate==F & is_speaker == F & hdate > as.Date("2010-05-06"),
                        list(prop.women.words=sum(word_count[Gender=="F"],na.rm=T)/sum(word_count),
                             prop.women.speeches=length(body[Gender=="F"])/length(body),
                             ratio.women.words=(sum(word_count[Gender=="F"],na.rm=T)/sum(word_count))/unique(prop_women),
                             ratio.women.speeches=(length(body[Gender=="F"])/length(body))/unique(prop_women),
                             minister.gender=unique(minister_gender),
                             minister.name=unique(minister_name),
                             ministry = unique(debate_department),
                             yearmon = unique(yearmon),
                             model_weights = .N), by = list(subsection_id, debate_department)]

after_2010$ministry <- as.factor(after_2010$ministry)
after_2010$yearmon.numeric <- as.numeric(after_2010$yearmon)-1997

sink("latex/tables/usefulNumbers/total_post2010_debates.tex")
cat(length(unique(after_2010$subsection_id)))
sink()

speeches$is_opposition_minister[is.na(speeches$is_opposition_minister)] <- F

excluding_shadow_ministers <- speeches[minister_in_debate==F & is_speaker == F & speeches$is_opposition_minister == F,
                list(prop.women.words=sum(word_count[Gender=="F"],na.rm=T)/sum(word_count),
                     prop.women.speeches=length(body[Gender=="F"])/length(body),
                     ratio.women.words=(sum(word_count[Gender=="F"],na.rm=T)/sum(word_count))/unique(prop_women),
                     ratio.women.speeches=(length(body[Gender=="F"])/length(body))/unique(prop_women),
                     minister.gender=unique(minister_gender),
                     minister.name=unique(minister_name),
                     ministry = unique(debate_department),
                     yearmon = unique(yearmon)), by = list(subsection_id, debate_department)]

excluding_shadow_ministers$ministry <- as.factor(excluding_shadow_ministers$ministry)
excluding_shadow_ministers$yearmon.numeric <- as.numeric(excluding_shadow_ministers$yearmon)-1997

### ################################################
### Subset Models
### ################################################

subset_effects <- function(dv = "prop.women.words", my_data = out, nBootstrapReps = 500){

  ## Exclude any missing values
  to_exclude <- which(is.na(my_data[[dv]]))
  if(length(to_exclude)!=0) my_data <- my_data[-to_exclude,]
  
  mod.6 <- as.formula(paste0(dv, " ~ minister.gender + ministry + as.factor(yearmon)"))
  boot.mod.6 <- bootstrap.function(mod.6, cluster="ministry", data=my_data, reps=nBootstrapReps)

  baseline <- my_data[,mean(get(dv)),by=minister.gender]$V1[1]
  
  mod.effects <- effect.func(boot.mod.6, 2, baseline)
  
  return(list(regression = boot.mod.6, effects = list(mod.effects)))
  
}

base_effects <- subset_effects(my_data = base)
qts_effects <- subset_effects(my_data = qts)
nonqts_effects <- subset_effects(my_data = nonqts)
opp_effects <- subset_effects(my_data = opp)
lab_effects <- subset_effects(my_data = lab)
con_effects <- subset_effects(my_data = con)
ld_effects <- subset_effects(my_data = ld)
before_2010_effects <- subset_effects(my_data = before_2010)
after_2010_effects <- subset_effects(my_data = after_2010)
excluding_shadow_ministers_effects <- subset_effects(my_data = excluding_shadow_ministers)

out <- data.frame(do.call("rbind",c(before_2010_effects$effects,
                                    after_2010_effects$effects, 
                                    qts_effects$effects, 
                                    nonqts_effects$effects, 
                                    opp_effects$effects, 
                                    lab_effects$effects, 
                                    con_effects$effects, 
                                    ld_effects$effects, 
                                    excluding_shadow_ministers_effects$effects)))
out$model <- c("Before2010","After2010","QT","NonQT", "Opp", "Lab", "Con", "Other","Shadow")
out$text <- c("Pre-2010","Post-2010","Question Time debates","Substantive Debates", "Opposition MPs", "Labour MPs", "Conservative MPs", "Minor party MPs", "No shadow ministers")
out <- out[nrow(out):1,]

pdf("plots/subset_effects.pdf",12,8)
par(cex = 1.4, lwd = 3, mar = c(5,0,0,0)+.1)
plot(out$effect,1:nrow(out), xlim = range(c(0,out[,-c(4,5)],100)), pch = 19, ylab ="", yaxt = "n", ylim = c(1,nrow(out)+.5), xlab = "% increase in female participation relative to male minister baseline", xaxt = "n")
segments(x0 = out$lower, x1= out$upper, y0 = 1:nrow(out))
#axis(2, at = 1:nrow(out), out$model, las =2)
abline(v = 0, lty = 3)
abline(v = base_effects[[2]][[1]][1], lty = 2)
text(out$effect,1:nrow(out)+.25, labels = out$text)
rect(xleft = base_effects[[2]][[1]][3], xright = base_effects[[2]][[1]][2], ybottom = 0, ytop = 20, col = alpha("black",0.2), border = NA)
mtext("Full sample effect",1,1, at = c(base_effects[[2]][[1]][1]), cex = 1.4)
axis(1, at = seq(0,100,20)[!seq(0,100,20)%in%seq(base_effects[[2]][[1]][3],base_effects[[2]][[1]][2],1)])
axis(1, at = c(base_effects[[2]][[1]][3],base_effects[[2]][[1]][2]), labels = c("",""))
dev.off()

month.fe <- c("Month FEs","$\\checkmark$","\\checkmark","\\checkmark","\\checkmark","\\checkmark","\\checkmark","\\checkmark","\\checkmark","\\checkmark")
ministry.fe <- c("Ministry FEs","$\\checkmark$","\\checkmark","\\checkmark","\\checkmark","\\checkmark","\\checkmark","\\checkmark","\\checkmark","\\checkmark")

sink("latex/tables/subset_tables.tex")
mod_stargazer(
  before_2010_effects$regression, after_2010_effects$regression, qts_effects$regression, nonqts_effects$regression, opp_effects$regression, lab_effects$regression, con_effects$regression, ld_effects$regression, excluding_shadow_ministers_effects$regression,
  intercept.top=T, intercept.bottom=F, keep=c(1,2), order=c(2,1), 
  add.lines = list(month.fe, ministry.fe,
                   c("Effect Size \\%", rev(out[,1])),
                   c("95\\% CI",paste("[", rev(out[,3]),",", rev(out[,2]),"]",sep=""))), 
  covariate.labels=c("Female minister","Constant"),
  column.sep.width="0.25pt", keep.stat=c("n","rsq","adj.rsq"),
  column.labels = c("Pre-2010", "Post-2010", "QT", "Non-QT", "Opp", "Lab", "Con", "Oth", "Exc-Shad"),
  dep.var.labels= "\\emph{PropWordsWomen}",
  dep.var.caption="",
  se=list(before_2010_effects$regression$boot.se, after_2010_effects$regression$boot.se, qts_effects$regression$boot.se, nonqts_effects$regression$boot.se, opp_effects$regression$boot.se, lab_effects$regression$boot.se, con_effects$regression$boot.se, ld_effects$regression$boot.se, excluding_shadow_ministers_effects$regression$boot.se), 
  no.space=T, model.names=F)
sink()

### ################################################
### Main models, controlling for Speaker gender
### ################################################

out <- speeches[minister_in_debate==F & is_speaker == F & !is.na(speaker_gender),
                list(prop.women.words = sum(word_count[Gender == "F"], na.rm = T)/sum(word_count),
                     prop.women.speeches = length(body[Gender == "F"])/length(body),
                     ratio.women.words = (sum(word_count[Gender == "F"],na.rm=T)/sum(word_count))/unique(prop_women),
                     ratio.women.speeches = (length(body[Gender == "F"])/length(body))/unique(prop_women),
                     minister.gender = unique(minister_gender),
                     opp.minister.gender = unique(opp_minister_gender),
                     speaker.gender = unique(speaker_gender),
                     minister.name = unique(minister_name),
                     ministry = unique(debate_department),
                     opp.ministry = unique(opp_debate_department),
                     yearmon = unique(yearmon),
                     model_weights = .N), 
                by = list(subsection_id, debate_department)]

out$ministry <- as.factor(out$ministry)
out$opp.ministry <- as.factor(out$opp.ministry)

out$yearmon.numeric <- as.numeric(out$yearmon)-1997

# Bespoke functions to estimate models, output latex tables, and calculate effect sizes

effect.sizes <- function(mod.list, title = "Proportion of words", type = "Bootstrap", baseline){
  gam.mod <- mod.list[[7]]
  mod.list <- mod.list[1:6]
  effects.se <- data.frame(mod = 1:6, do.call("rbind",lapply(mod.list, function(x) effect.func(x, baseline = baseline, se = "se"))), se = "Regular")
  effects.clus <- data.frame(mod = 1:6, do.call("rbind",lapply(mod.list, function(x) effect.func(x, baseline = baseline, se = "clustered.se"))), se = "Clustered")
  effects.boot <- data.frame(mod = 1:6, do.call("rbind",lapply(mod.list, function(x) effect.func(x, baseline = baseline, se = "boot.se"))), se = "Bootstrap")
  effects.gam <- data.frame(mod = 7, t(effect.func.gam(gam.mod, baseline = baseline)), se = "Regular")
  
  effects.all <- rbind(
    effects.se,
    effects.clus,
    effects.boot,
    effects.gam)
  
  effects.all$title <- title
  
  effects <- effects.all[effects.all$se==type | effects.all$mod==7, c("effect", "upper", "lower")]
  
  
  if(type != "all") return(effects)
  if(type == "all") return(effects.all)
  
}

main_reg_func <- function(dv = "prop.women.words", iv = "minister.gender + speaker.gender", main_iv = "minister.gender", my_data = out, nBootstrapReps = 10, file_out = "latex/tables/words_prop_boot_tables.tex", ...){
  
  mod.1 <- as.formula(paste0(dv, " ~ ",iv))
  boot.mod.1 <- bootstrap.function(mod.1, cluster="ministry", data=my_data, reps=nBootstrapReps)
  
  mod.2 <- as.formula(paste0(dv, " ~ ",iv, " + as.factor(yearmon)"))
  boot.mod.2 <- bootstrap.function(mod.2, cluster="ministry", data=my_data, reps=nBootstrapReps)
  
  mod.3 <- as.formula(paste0(dv, " ~ ",iv, " + ministry"))
  boot.mod.3 <- bootstrap.function(mod.3, cluster="ministry", data=my_data, reps=nBootstrapReps)
  
  mod.4 <- as.formula(paste0(dv, " ~ ",iv, " + ministry + as.factor(yearmon)"))
  boot.mod.4 <- bootstrap.function(mod.4, cluster="ministry", data=my_data, reps=nBootstrapReps)
  
  mod.5 <- as.formula(paste0(dv, " ~ ",iv, " + ministry*yearmon.numeric + as.factor(yearmon) "))
  boot.mod.5 <- bootstrap.function(mod.5, cluster="ministry", data=my_data, reps=nBootstrapReps)
  
  mod.6 <- as.formula(paste0(dv, " ~ ",iv, " + ministry*poly(yearmon.numeric,2) + as.factor(yearmon) "))
  boot.mod.6 <- bootstrap.function(mod.6, cluster="ministry", data=my_data, reps=nBootstrapReps)
  
  mod.gam <- as.formula(paste0(dv," ~ ",iv, "  + s(as.numeric(yearmon),by=ministry) + ministry + as.factor(yearmon) "))
  boot.mod.gam <- gam(mod.gam, data=my_data)
  
  mod.list <- list(boot.mod.1, boot.mod.2, boot.mod.3, boot.mod.4, boot.mod.5, boot.mod.6, boot.mod.gam)
  
  #baseline <- 0.1765623 # Hardcode baseline from full sample used in main analysis
  baseline <- my_data[,mean(get(dv)),by=get(main_iv)]#$V1#[1]
  baseline <- baseline$V1[baseline$get == "M"]
  
  mod.effects <- effect.sizes(mod.list, type = "Bootstrap", baseline = baseline)
  
  return(list(mod.list, mod.effects))
  
}


stargazer.multi.func <- function(boot.out.list,file_out,...){
  
  boot.mod.1 <- boot.out.list[[1]][[1]]
  boot.mod.2 <- boot.out.list[[1]][[2]]
  boot.mod.3 <- boot.out.list[[1]][[3]]
  boot.mod.4 <- boot.out.list[[1]][[4]]
  boot.mod.5 <- boot.out.list[[1]][[5]]
  boot.mod.6 <- boot.out.list[[1]][[6]]
  boot.mod.gam <- boot.out.list[[1]][[7]]
  
  mod.effects <- boot.out.list[[2]]
  
  
  month.fe <- c("Month FEs","$\\times$","\\checkmark","$\\times$","\\checkmark","\\checkmark","\\checkmark","\\checkmark")
  ministry.fe <- c("Ministry FEs","$\\times$","$\\times$","\\checkmark","\\checkmark","\\checkmark","\\checkmark","\\checkmark")
  linear.tt <- c("Linear time trends","$\\times$","$\\times$","$\\times$","$\\times$","\\checkmark","\\checkmark","$\\times$")
  quadratic.tt <- c("Quadratic time trends","$\\times$","$\\times$","$\\times$","$\\times$","$\\times$","\\checkmark","$\\times$")
  smooths <- c("Flexible time trends","$\\times$", "$\\times$","$\\times$","$\\times$","$\\times$","$\\times$","\\checkmark")
  
  sink(file=file_out)
  mod_stargazer(
    boot.mod.1, boot.mod.2, boot.mod.3, boot.mod.4, boot.mod.5, boot.mod.6, boot.mod.gam,
    intercept.top=T, intercept.bottom=F, 
    add.lines = list(month.fe, ministry.fe, linear.tt, quadratic.tt, smooths,
                     c("Effect Size \\%", mod.effects[,1]),
                     c("95\\% CI",paste("[", mod.effects[,3],",", mod.effects[,2],"]",sep=""))), 
    column.sep.width="0.25pt", keep.stat=c("n","rsq","adj.rsq"),
    dep.var.caption="",
    se=list(boot.mod.1$boot.se , boot.mod.2$boot.se , boot.mod.3$boot.se, boot.mod.4$boot.se, boot.mod.5$boot.se, boot.mod.6$boot.se, summary(boot.mod.gam)$se), 
    no.space=T, model.names=F,...)
  sink()
  
}


# Proportion words ------------------------------------------------------------ 

prop.women.words.out.speaker <- main_reg_func(dv = "prop.women.words", 
                                      iv = "minister.gender + speaker.gender",
                                      main_iv = "minister.gender",
                                      my_data = out[!is.na(out$speaker.gender),], 
                                      nBootstrapReps = nBootstrapReps)

stargazer.multi.func(prop.women.words.out.speaker, 
                     file_out = "latex/tables/words_prop_boot_tables_speaker_gender.tex", 
                     title="", 
                     label="main_results_prop_boot_speaker_gender",
                     dep.var.labels = "\\emph{PropWordsWomen}",
                     keep=c(1,2,3), order=c(2,3,1),
                     covariate.labels = c("Female minister","Female Speaker","Constant"))



### ################################################
### Main models, with Opposition minister gender as treatment
### ################################################

speeches$minister_in_debate[is.na(speeches$minister_in_debate)] <- F

out <- speeches[opp_minister_in_debate == F & is_speaker == F & opposition_minister_present == T & minister_present == F,
                list(prop.women.words = sum(word_count[Gender == "F"], na.rm = T)/sum(word_count),
                     prop.women.speeches = length(body[Gender == "F"])/length(body),
                     ratio.women.words = (sum(word_count[Gender == "F"],na.rm=T)/sum(word_count))/unique(prop_women),
                     ratio.women.speeches = (length(body[Gender == "F"])/length(body))/unique(prop_women),
                     minister.gender = unique(minister_gender),
                     opp.minister.gender = unique(opp_minister_gender),
                     speaker.gender = unique(speaker_gender),
                     ministry = unique(opp_debate_department),
                     yearmon = unique(yearmon),
                     model_weights = .N), 
                by = list(subsection_id)]

out$yearmon.numeric <- as.numeric(out$yearmon)-1997

out$ministry <- as.factor(out$ministry)

prop.women.words.out.opposition <- main_reg_func(dv = "prop.women.words", 
                                              iv = "opp.minister.gender",
                                              main_iv = "opp.minister.gender",
                                              my_data = out, 
                                              nBootstrapReps = nBootstrapReps)

stargazer.multi.func(prop.women.words.out.opposition, 
                     file_out = "latex/tables/words_prop_boot_tables_opp_minister_gender.tex", 
                     title="", 
                     label="main_results_prop_boot_opp_minister_gender",
                     dep.var.labels = "\\emph{PropWordsWomen}",
                     keep = c(1,2),
                     order = c(2,1),
                     covariate.labels = c("Female shadow minister","Constant"))

### ################################################
### Main models, with Opposition ministers excluded
### ################################################

speeches$is_opposition_minister[is.na(speeches$is_opposition_minister)] <- F

out <- speeches[minister_in_debate==F & is_speaker == F & is_opposition_minister == F,
                list(prop.women.words=sum(word_count[Gender=="F"],na.rm=T)/sum(word_count),
                     prop.women.speeches=length(body[Gender=="F"])/length(body),
                     ratio.women.words=(sum(word_count[Gender=="F"],na.rm=T)/sum(word_count))/unique(prop_women),
                     ratio.women.speeches=(length(body[Gender=="F"])/length(body))/unique(prop_women),
                     minister.gender=unique(minister_gender),
                     minister.name=unique(minister_name),
                     ministry = unique(debate_department),
                     yearmon = unique(yearmon)), by = list(subsection_id, debate_department)]

out$ministry <- as.factor(out$ministry)

out$yearmon.numeric <- as.numeric(out$yearmon)-1997

out <- out[!is.na(out$minister.gender)]

# Proportion words ------------------------------------------------------------ 

prop.women.words.out.shadow <- main_reg_func(dv = "prop.women.words", 
                                             iv = "minister.gender",
                                             data = out, 
                                             nBootstrapReps = nBootstrapReps)

stargazer.multi.func(prop.women.words.out.shadow, 
                     file_out = "latex/tables/words_prop_boot_tables_shadow.tex", 
                     title="", 
                     label="main_results_prop_boot_shadow",
                     dep.var.labels = "\\emph{PropWordsWomen} (Opposition ministers excluded)",
                     keep = c(1,2),
                     order = c(2,1),
                     covariate.labels = c("Female minister","Constant"))




### ################################################
### Main models, Conservative Party
### ################################################

con <- speeches[minister_in_debate==F & is_speaker == F & party_short == "Conservative",
                list(prop.women.words=sum(word_count[Gender=="F"],na.rm=T)/sum(word_count),
                     prop.women.speeches=length(body[Gender=="F"])/length(body),
                     ratio.women.words=(sum(word_count[Gender=="F"],na.rm=T)/sum(word_count))/unique(prop_women),
                     ratio.women.speeches=(length(body[Gender=="F"])/length(body))/unique(prop_women),
                     minister.gender=unique(minister_gender),
                     minister.name=unique(minister_name),
                     ministry = unique(debate_department),
                     yearmon = unique(yearmon),
                     model_weights = .N), by = list(subsection_id, debate_department)]

con <- con[complete.cases(con),]
con$ministry <- as.factor(con$ministry)
con$yearmon.numeric <- as.numeric(con$yearmon)-1997

prop.women.words.out.con <- main_reg_func(dv = "prop.women.words", 
                                                 iv = "minister.gender",
                                                 my_data = con, 
                                                 nBootstrapReps = nBootstrapReps)

stargazer.multi.func(prop.women.words.out.con, 
                     file_out = "latex/tables/words_prop_boot_tables_conservative.tex", 
                     title="", 
                     label="main_results_prop_boot_conservative",
                     dep.var.labels = "\\emph{PropWordsWomen} (Conservative MPs)",
                     keep = c(1,2),
                     order = c(2,1),
                     covariate.labels = c("Female minister","Constant"))

### ################################################
### Main models, Labout Party
### ################################################

lab <- speeches[minister_in_debate==F & is_speaker == F & party_short == "Labour",
                list(prop.women.words=sum(word_count[Gender=="F"],na.rm=T)/sum(word_count),
                     prop.women.speeches=length(body[Gender=="F"])/length(body),
                     ratio.women.words=(sum(word_count[Gender=="F"],na.rm=T)/sum(word_count))/unique(prop_women),
                     ratio.women.speeches=(length(body[Gender=="F"])/length(body))/unique(prop_women),
                     minister.gender=unique(minister_gender),
                     minister.name=unique(minister_name),
                     ministry = unique(debate_department),
                     yearmon = unique(yearmon),
                     model_weights = .N), by = list(subsection_id, debate_department)]

lab <- lab[complete.cases(lab),]
lab$ministry <- as.factor(lab$ministry)
lab$yearmon.numeric <- as.numeric(lab$yearmon)-1997

prop.women.words.out.lab <- main_reg_func(dv = "prop.women.words", 
                                          iv = "minister.gender",
                                          my_data = lab, 
                                          nBootstrapReps = nBootstrapReps)

stargazer.multi.func(prop.women.words.out.lab, 
                     file_out = "latex/tables/words_prop_boot_tables_labour.tex", 
                     title="", 
                     label="main_results_prop_boot_labour",
                     dep.var.labels = "\\emph{PropWordsWomen} (Labour MPs)",
                     keep = c(1,2),
                     order = c(2,1),
                     covariate.labels = c("Female minister","Constant"))


### ################################################
### Over-time, within ministry spillovers 
### ################################################

out <- speeches[minister_in_debate==F & is_speaker == F ,
                list(prop.women.words = sum(word_count[Gender == "F"], na.rm = T)/sum(word_count),
                     prop.women.speeches = length(body[Gender == "F"])/length(body),
                     ratio.women.words = (sum(word_count[Gender == "F"],na.rm=T)/sum(word_count))/unique(prop_women),
                     ratio.women.speeches = (length(body[Gender == "F"])/length(body))/unique(prop_women),
                     minister.gender = unique(minister_gender),
                     minister.name = unique(minister_name),
                     ministry = unique(debate_department),
                     yearmon = unique(yearmon),
                     Speech.Date = unique(hdate),
                     model_weights = .N), 
                by = list(subsection_id, debate_department)]
out <- out[complete.cases(out)]
out$debate_department <- as.factor(out$debate_department)
out$ministry <- as.factor(out$ministry)
out$yearmon.numeric <- as.numeric(out$yearmon)
out$within1 <- out$within2 <- out$within3 <- out$within4 <- out$within5 <- out$within6 <- F

months <- c(1:6)

for(n_months in months){
  
  female_start_stop <- out[minister.gender == "F", max(Speech.Date),by = list(minister.name,debate_department)]
  female_start_stop$stop <- female_start_stop$V1 + 30*n_months
  out$Speech.Date2 <- out$Speech.Date
  
  setkey(out, debate_department, Speech.Date, Speech.Date2)
  setkey(female_start_stop, debate_department, V1, stop)
  
  which_overlaps <- foverlaps(out, female_start_stop, by.x=c("debate_department","Speech.Date", "Speech.Date2"), by.y = c("debate_department","V1","stop"), which = T)
  
  if(n_months == 1) out$within1[unique(which_overlaps[!is.na(which_overlaps$yid)]$xid)] <- T
  if(n_months == 2) out$within2[unique(which_overlaps[!is.na(which_overlaps$yid)]$xid)] <- T
  if(n_months == 3) out$within3[unique(which_overlaps[!is.na(which_overlaps$yid)]$xid)] <- T
  if(n_months == 4) out$within4[unique(which_overlaps[!is.na(which_overlaps$yid)]$xid)] <- T
  if(n_months == 5) out$within5[unique(which_overlaps[!is.na(which_overlaps$yid)]$xid)] <- T
  if(n_months == 6) out$within6[unique(which_overlaps[!is.na(which_overlaps$yid)]$xid)] <- T
  
}

out$within6[out$within5] <- F
out$within5[out$within4] <- F
out$within4[out$within3] <- F
out$within3[out$within2] <- F
out$within2[out$within1] <- F

spillovers.models <- main_reg_func(dv = "prop.women.words", 
                                              iv = "minister.gender + within1 + within2 + within3 + within4 + within5 + within6",
                                              my_data = out, 
                                              nBootstrapReps = nBootstrapReps)

stargazer.multi.func(spillovers.models, 
                     file_out = "latex/tables/time_spillovers.tex", 
                     title="", 
                     label="",
                     dep.var.labels = "\\emph{PropWordsWomen}",
                     keep = c(2,3,4,5,6,7,1),
                     order = c(2,3,4,5,6,7,1),
                     covariate.labels = c("Female minister","0-1 month after","1-2 months after", "2-3 months after","3-4 months after","4-5 months after","5-6 months after","Constant"))

